Re-training multiple layers of Inception

Note: Ignore the results. This repo is only to demonstrate the process


In [1]:
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K


Using TensorFlow backend.

In [3]:
base_model = InceptionV3(weights='imagenet', include_top=False)

In [4]:
# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer -- let's say we have 2 classes
predictions = Dense(2, activation='softmax')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

In [5]:
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

In [6]:
# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

In [7]:
import numpy as np

In [8]:
data = np.load('train.npy')
labels = np.load('tlabels.npy')

In [10]:
model.fit(data, labels, batch_size=25, epochs=10, verbose=1, callbacks=None, validation_split=0.3, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0)


Train on 1409 samples, validate on 604 samples
Epoch 1/10
1409/1409 [==============================] - 103s - loss: 1.1922 - val_loss: 0.5801
Epoch 2/10
1409/1409 [==============================] - 104s - loss: 0.5765 - val_loss: 0.4961
Epoch 3/10
1409/1409 [==============================] - 102s - loss: 0.5294 - val_loss: 1.9590
Epoch 4/10
1409/1409 [==============================] - 100s - loss: 0.5085 - val_loss: 0.8440
Epoch 5/10
1409/1409 [==============================] - 104s - loss: 0.4545 - val_loss: 0.3225
Epoch 6/10
1409/1409 [==============================] - 104s - loss: 0.4293 - val_loss: 0.3479
Epoch 7/10
1409/1409 [==============================] - 104s - loss: 0.4048 - val_loss: 0.3710
Epoch 8/10
1409/1409 [==============================] - 99s - loss: 0.3390 - val_loss: 0.3841
Epoch 9/10
1409/1409 [==============================] - 102s - loss: 0.3319 - val_loss: 0.7677
Epoch 10/10
1409/1409 [==============================] - 105s - loss: 0.3392 - val_loss: 0.5102
Out[10]:
<keras.callbacks.History at 0x7f07159eba90>

In [11]:
# at this point, the top layers are well trained and we can start fine-tuning
# convolutional layers from inception V3. We will freeze the bottom N layers
# and train the remaining top layers.

# let's visualize layer names and layer indices to see how many layers
# we should freeze:
for i, layer in enumerate(base_model.layers):
   print(i, layer.name)

# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 249 layers and unfreeze the rest:
for layer in model.layers[:249]:
   layer.trainable = False
for layer in model.layers[249:]:
   layer.trainable = True


(0, 'input_1')
(1, 'conv2d_1')
(2, 'batch_normalization_1')
(3, 'activation_1')
(4, 'conv2d_2')
(5, 'batch_normalization_2')
(6, 'activation_2')
(7, 'conv2d_3')
(8, 'batch_normalization_3')
(9, 'activation_3')
(10, 'max_pooling2d_1')
(11, 'conv2d_4')
(12, 'batch_normalization_4')
(13, 'activation_4')
(14, 'conv2d_5')
(15, 'batch_normalization_5')
(16, 'activation_5')
(17, 'max_pooling2d_2')
(18, 'conv2d_9')
(19, 'batch_normalization_9')
(20, 'activation_9')
(21, 'conv2d_7')
(22, 'conv2d_10')
(23, 'batch_normalization_7')
(24, 'batch_normalization_10')
(25, 'activation_7')
(26, 'activation_10')
(27, 'average_pooling2d_1')
(28, 'conv2d_6')
(29, 'conv2d_8')
(30, 'conv2d_11')
(31, 'conv2d_12')
(32, 'batch_normalization_6')
(33, 'batch_normalization_8')
(34, 'batch_normalization_11')
(35, 'batch_normalization_12')
(36, 'activation_6')
(37, 'activation_8')
(38, 'activation_11')
(39, 'activation_12')
(40, 'mixed0')
(41, 'conv2d_16')
(42, 'batch_normalization_16')
(43, 'activation_16')
(44, 'conv2d_14')
(45, 'conv2d_17')
(46, 'batch_normalization_14')
(47, 'batch_normalization_17')
(48, 'activation_14')
(49, 'activation_17')
(50, 'average_pooling2d_2')
(51, 'conv2d_13')
(52, 'conv2d_15')
(53, 'conv2d_18')
(54, 'conv2d_19')
(55, 'batch_normalization_13')
(56, 'batch_normalization_15')
(57, 'batch_normalization_18')
(58, 'batch_normalization_19')
(59, 'activation_13')
(60, 'activation_15')
(61, 'activation_18')
(62, 'activation_19')
(63, 'mixed1')
(64, 'conv2d_23')
(65, 'batch_normalization_23')
(66, 'activation_23')
(67, 'conv2d_21')
(68, 'conv2d_24')
(69, 'batch_normalization_21')
(70, 'batch_normalization_24')
(71, 'activation_21')
(72, 'activation_24')
(73, 'average_pooling2d_3')
(74, 'conv2d_20')
(75, 'conv2d_22')
(76, 'conv2d_25')
(77, 'conv2d_26')
(78, 'batch_normalization_20')
(79, 'batch_normalization_22')
(80, 'batch_normalization_25')
(81, 'batch_normalization_26')
(82, 'activation_20')
(83, 'activation_22')
(84, 'activation_25')
(85, 'activation_26')
(86, 'mixed2')
(87, 'conv2d_28')
(88, 'batch_normalization_28')
(89, 'activation_28')
(90, 'conv2d_29')
(91, 'batch_normalization_29')
(92, 'activation_29')
(93, 'conv2d_27')
(94, 'conv2d_30')
(95, 'batch_normalization_27')
(96, 'batch_normalization_30')
(97, 'activation_27')
(98, 'activation_30')
(99, 'max_pooling2d_3')
(100, 'mixed3')
(101, 'conv2d_35')
(102, 'batch_normalization_35')
(103, 'activation_35')
(104, 'conv2d_36')
(105, 'batch_normalization_36')
(106, 'activation_36')
(107, 'conv2d_32')
(108, 'conv2d_37')
(109, 'batch_normalization_32')
(110, 'batch_normalization_37')
(111, 'activation_32')
(112, 'activation_37')
(113, 'conv2d_33')
(114, 'conv2d_38')
(115, 'batch_normalization_33')
(116, 'batch_normalization_38')
(117, 'activation_33')
(118, 'activation_38')
(119, 'average_pooling2d_4')
(120, 'conv2d_31')
(121, 'conv2d_34')
(122, 'conv2d_39')
(123, 'conv2d_40')
(124, 'batch_normalization_31')
(125, 'batch_normalization_34')
(126, 'batch_normalization_39')
(127, 'batch_normalization_40')
(128, 'activation_31')
(129, 'activation_34')
(130, 'activation_39')
(131, 'activation_40')
(132, 'mixed4')
(133, 'conv2d_45')
(134, 'batch_normalization_45')
(135, 'activation_45')
(136, 'conv2d_46')
(137, 'batch_normalization_46')
(138, 'activation_46')
(139, 'conv2d_42')
(140, 'conv2d_47')
(141, 'batch_normalization_42')
(142, 'batch_normalization_47')
(143, 'activation_42')
(144, 'activation_47')
(145, 'conv2d_43')
(146, 'conv2d_48')
(147, 'batch_normalization_43')
(148, 'batch_normalization_48')
(149, 'activation_43')
(150, 'activation_48')
(151, 'average_pooling2d_5')
(152, 'conv2d_41')
(153, 'conv2d_44')
(154, 'conv2d_49')
(155, 'conv2d_50')
(156, 'batch_normalization_41')
(157, 'batch_normalization_44')
(158, 'batch_normalization_49')
(159, 'batch_normalization_50')
(160, 'activation_41')
(161, 'activation_44')
(162, 'activation_49')
(163, 'activation_50')
(164, 'mixed5')
(165, 'conv2d_55')
(166, 'batch_normalization_55')
(167, 'activation_55')
(168, 'conv2d_56')
(169, 'batch_normalization_56')
(170, 'activation_56')
(171, 'conv2d_52')
(172, 'conv2d_57')
(173, 'batch_normalization_52')
(174, 'batch_normalization_57')
(175, 'activation_52')
(176, 'activation_57')
(177, 'conv2d_53')
(178, 'conv2d_58')
(179, 'batch_normalization_53')
(180, 'batch_normalization_58')
(181, 'activation_53')
(182, 'activation_58')
(183, 'average_pooling2d_6')
(184, 'conv2d_51')
(185, 'conv2d_54')
(186, 'conv2d_59')
(187, 'conv2d_60')
(188, 'batch_normalization_51')
(189, 'batch_normalization_54')
(190, 'batch_normalization_59')
(191, 'batch_normalization_60')
(192, 'activation_51')
(193, 'activation_54')
(194, 'activation_59')
(195, 'activation_60')
(196, 'mixed6')
(197, 'conv2d_65')
(198, 'batch_normalization_65')
(199, 'activation_65')
(200, 'conv2d_66')
(201, 'batch_normalization_66')
(202, 'activation_66')
(203, 'conv2d_62')
(204, 'conv2d_67')
(205, 'batch_normalization_62')
(206, 'batch_normalization_67')
(207, 'activation_62')
(208, 'activation_67')
(209, 'conv2d_63')
(210, 'conv2d_68')
(211, 'batch_normalization_63')
(212, 'batch_normalization_68')
(213, 'activation_63')
(214, 'activation_68')
(215, 'average_pooling2d_7')
(216, 'conv2d_61')
(217, 'conv2d_64')
(218, 'conv2d_69')
(219, 'conv2d_70')
(220, 'batch_normalization_61')
(221, 'batch_normalization_64')
(222, 'batch_normalization_69')
(223, 'batch_normalization_70')
(224, 'activation_61')
(225, 'activation_64')
(226, 'activation_69')
(227, 'activation_70')
(228, 'mixed7')
(229, 'conv2d_73')
(230, 'batch_normalization_73')
(231, 'activation_73')
(232, 'conv2d_74')
(233, 'batch_normalization_74')
(234, 'activation_74')
(235, 'conv2d_71')
(236, 'conv2d_75')
(237, 'batch_normalization_71')
(238, 'batch_normalization_75')
(239, 'activation_71')
(240, 'activation_75')
(241, 'conv2d_72')
(242, 'conv2d_76')
(243, 'batch_normalization_72')
(244, 'batch_normalization_76')
(245, 'activation_72')
(246, 'activation_76')
(247, 'max_pooling2d_4')
(248, 'mixed8')
(249, 'conv2d_81')
(250, 'batch_normalization_81')
(251, 'activation_81')
(252, 'conv2d_78')
(253, 'conv2d_82')
(254, 'batch_normalization_78')
(255, 'batch_normalization_82')
(256, 'activation_78')
(257, 'activation_82')
(258, 'conv2d_79')
(259, 'conv2d_80')
(260, 'conv2d_83')
(261, 'conv2d_84')
(262, 'average_pooling2d_8')
(263, 'conv2d_77')
(264, 'batch_normalization_79')
(265, 'batch_normalization_80')
(266, 'batch_normalization_83')
(267, 'batch_normalization_84')
(268, 'conv2d_85')
(269, 'batch_normalization_77')
(270, 'activation_79')
(271, 'activation_80')
(272, 'activation_83')
(273, 'activation_84')
(274, 'batch_normalization_85')
(275, 'activation_77')
(276, 'mixed9_0')
(277, 'concatenate_1')
(278, 'activation_85')
(279, 'mixed9')
(280, 'conv2d_90')
(281, 'batch_normalization_90')
(282, 'activation_90')
(283, 'conv2d_87')
(284, 'conv2d_91')
(285, 'batch_normalization_87')
(286, 'batch_normalization_91')
(287, 'activation_87')
(288, 'activation_91')
(289, 'conv2d_88')
(290, 'conv2d_89')
(291, 'conv2d_92')
(292, 'conv2d_93')
(293, 'average_pooling2d_9')
(294, 'conv2d_86')
(295, 'batch_normalization_88')
(296, 'batch_normalization_89')
(297, 'batch_normalization_92')
(298, 'batch_normalization_93')
(299, 'conv2d_94')
(300, 'batch_normalization_86')
(301, 'activation_88')
(302, 'activation_89')
(303, 'activation_92')
(304, 'activation_93')
(305, 'batch_normalization_94')
(306, 'activation_86')
(307, 'mixed9_1')
(308, 'concatenate_2')
(309, 'activation_94')
(310, 'mixed10')

In [12]:
# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy')

In [13]:
model.fit(data, labels, batch_size=25, epochs=10, verbose=1, callbacks=None, validation_split=0.3, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0)


Train on 1409 samples, validate on 604 samples
Epoch 1/10
1409/1409 [==============================] - 120s - loss: 0.2915 - val_loss: 0.4194
Epoch 2/10
1409/1409 [==============================] - 117s - loss: 0.2077 - val_loss: 0.3832
Epoch 3/10
1409/1409 [==============================] - 118s - loss: 0.1928 - val_loss: 0.3669
Epoch 4/10
1409/1409 [==============================] - 118s - loss: 0.1402 - val_loss: 0.3569
Epoch 5/10
1409/1409 [==============================] - 116s - loss: 0.1801 - val_loss: 0.3554
Epoch 6/10
1409/1409 [==============================] - 122s - loss: 0.1401 - val_loss: 0.3538
Epoch 7/10
1409/1409 [==============================] - 116s - loss: 0.1389 - val_loss: 0.3516
Epoch 8/10
1409/1409 [==============================] - 117s - loss: 0.1294 - val_loss: 0.3490
Epoch 9/10
1409/1409 [==============================] - 115s - loss: 0.1269 - val_loss: 0.3440
Epoch 10/10
1409/1409 [==============================] - 119s - loss: 0.1213 - val_loss: 0.3421
Out[13]:
<keras.callbacks.History at 0x7f06e9793e10>